Homepage
The formatted source code for this file is here.
And a raw version here.
Previous work by Youngser Park can be found here.

1 Data

Here we read in the data and select a random half of it for exploration.

featFull <- fread("../data/synapsinR_7thA.tif.Pivots.txt.2011Features.txt",showProgress=FALSE)

### Setting a seed and creating an index vector
### to select half of the data
set.seed(2^10)
half1 <- sample(dim(featFull)[1],dim(featFull)[1]/2)
half2 <- setdiff(1:dim(featFull)[1],half1)

feat <- featFull[half1,]
dim(feat)
# [1] 559649    144
## Setting the channel names
channel <- c('Synap_1','Synap_2','VGlut1_t1','VGlut1_t2','VGlut2','Vglut3',
              'psd','glur2','nmdar1','nr2b','gad','VGAT',
              'PV','Gephyr','GABAR1','GABABR','CR1','5HT1A',
              'NOS','TH','VACht','Synapo','tubuli','DAPI')

## Setting the channel types
channel.type <- c('ex.pre','ex.pre','ex.pre','ex.pre','ex.pre','in.pre.small',
                  'ex.post','ex.post','ex.post','ex.post','in.pre','in.pre',
                  'in.pre','in.post','in.post','in.post','in.pre.small','other',
                  'ex.post','other','other','ex.post','none','none')

channel.type2 <- c('ex.pre','ex.pre','ex.pre','ex.pre','ex.pre','other',
                   'ex.post','ex.post','ex.post','ex.post','in.pre','in.pre',
                   'in.pre','in.post','in.post','in.post','other','other',
                   'ex.post','other','other','ex.post','other','other')
nchannel <- length(channel)
nfeat <- ncol(feat) / nchannel

## Createing factor variables for channel and channel type sorted properly
ffchannel <- (factor(channel.type,
    levels= c("ex.pre","ex.post","in.pre","in.post","in.pre.small","other","none")
    ))
fchannel <- as.numeric(factor(channel.type,
    levels= c("ex.pre","ex.post","in.pre","in.post","in.pre.small","other","none")
    ))
ford <- order(fchannel)


## Setting up colors for channel types
Syncol <-  c("#197300","#5ed155","#660000","#cc0000","#ff9933","#0000cd","#ffd700")
Syncol3 <- c("#197300","#197300","#cc0000","#cc0000","#0000cd","#0000cd","#0000cd")
ccol <- Syncol[fchannel]
ccol3 <- Syncol3[fchannel]

exType <- factor(c(rep("ex",11),rep("in",6),rep("other",7)),ordered=TRUE)
exCol<-exType;levels(exCol) <- c("#197300","#990000","#0000cd");
exCol <- as.character(exCol)

fname <- as.vector(sapply(channel,function(x) paste0(x,paste0("F",0:5))))
names(feat) <- fname
fcol <- rep(ccol, each=6)
mycol <- colorpanel(100, "purple", "black", "green")
mycol2 <- matlab.like(nchannel)
mycol3 <- colorpanel(100, "blue","white","red")

1.1 Data transformations

f <- lapply(1:6,function(x){seq(x,ncol(feat),by=nfeat)})
featF <- lapply(f,function(x){subset(feat,select=x)})

featF0 <- featF[[1]]
f01e3 <- 1e3*data.table(apply(X=featF0,2,function(x){((x-min(x))/(max(x)-min(x)))}))

fs <- f01e3

### Taking log_10 on data + 1.
log1f <- log10(featF0 + 1)
slog1f <- data.table(scale(log1f, center=TRUE,scale=TRUE))

We now have the following data sets:

  • featF0: The feature vector looking only at the integrated brightness features.
  • fs: The feature vector scaled between \([0,1000]\).
  • logf1: The feature vector, plus one, then \(log_{10}\) is applied.
  • slog1f: The feature vector, plus one, \(log_{10}\), then scaled by subtracting the mean and dividing by the sample standard deviation.

2 Marker Exploration

2.1 Correlation Matrix of markers

corrf <- lapply(lapply(featF, cor), function(x) x[ford, ford])

titles <- paste('Correlation Matrix of', paste0("F", 0:5))
par(mfrow = c(3,2))
for(i in 1:length(corrf)) {
  corrplot(corrf[[i]],method="color",tl.col=ccol[ford], 
           tl.cex=0.8, mar=c(1,0,1,1.5))
  title(titles[i])
}
Figure 1: Correlation on untransformed F0 data, reordered by synapse type.

bford <- order(rep(fchannel,each=6))
nord <- Reduce('c', f)
cr <- rep(ccol, each=6)
corrfB <- cor(feat)[bford,bford]
corrplot(corrfB,method="color",tl.col=cr[bford],tl.cex=0.75)
Figure 2: Full Correlation on untransformed F0-F5 data, reordered by synapse type.

2.2 Distance Correlation T-tests

computeDcor <- function(x) {
  set.seed(317)
  sam1 <- sample(dim(x)[1], min(1e3,dim(x)[1]))
  tmp <- as.data.frame((x[sam1,]))
  
  combcols <- t(combn(24,2))
  
  dc <- foreach(i = 1:dim(combcols)[1]) %dopar% {
         set.seed(331*i)
         dcor.ttest(x=tmp[,combcols[i,1]],y=tmp[,combcols[i,2]])
         }
  
  ms <- matrix(as.numeric(0),24,24)
  mp <- matrix(as.numeric(0),24,24)
  
  for(i in 1:length(dc)){
      ms[combcols[i,1],combcols[i,2]] <- dc[[i]]$statistic
      ms[combcols[i,2],combcols[i,1]] <- dc[[i]]$statistic
      mp[combcols[i,1],combcols[i,2]] <- dc[[i]]$p.val
      mp[combcols[i,2],combcols[i,1]] <- dc[[i]]$p.val
  }
  
  rownames(ms) <- colnames(x)
  rownames(mp) <- colnames(x)
  colnames(ms) <- colnames(x)
  colnames(mp) <- colnames(x)
  
  diag(ms) <- as.numeric(0)
  diag(mp) <- as.numeric(1)
  return(list(ms, mp))
}

mdcor <- lapply(featF, computeDcor)
cl5 <- colorRampPalette(c("white", "blue"))
gr5 <- colorRampPalette(c("darkgreen", "white", "white"))
bl5 <- colorRampPalette(c("blue", "red"))

sTitle <- paste("dcor.ttest statistic", paste0("F", 0:5))
pTitle <- paste("dcor.ttest log10(p-value)", paste0("F", 0:5))


par(mfcol=c(6,2), oma=c(1,1,1,1))
for(i in 1:length(mdcor)){
  corrplot(mdcor[[i]][[1]][ford,ford],is.corr=FALSE,method="color",
           tl.col=ccol[ford], tl.cex=0.8, mar=c(1,0,1,1.5))
  title(sTitle[i]) 
  corrRect(as.numeric(table(fchannel)),col=Syncol,lwd=4)
}

for(i in 1:length(mdcor)){
  corrplot((log10(mdcor[[i]][[2]][ford,ford]+.Machine$double.eps)),is.corr=FALSE,method="color",tl.col=ccol[ford], 
           tl.cex=0.8, mar=c(1,0,1,1.5),
           p.mat=mdcor[[i]][[2]][ford,ford], 
           sig.level=0.01, pch = 'x', pch.cex=0.5)
  title(pTitle[i])
  corrRect(as.numeric(table(fchannel)),col=Syncol,lwd=4)
}
Figure 3: F0-5: Distance correlation t-test statistic and p-value matrices.

The X’s in the above right figure denote a p-value greater than 0.01.

2.3 Scatterplots

We will run PCA on the untransformed correlation matrix so the data can be viewed in 2-dimensions. The colors correspond to synapse type.

pcaL <- lapply(corrf, prcomp, center=TRUE, scale=TRUE)
titlepca <- paste("PCA on ", paste0("cor(F", 0:5, ')'))
for(i in 1:length(pcaL)) { 
  pairs(pcaL[[i]]$x[,1:3], col=ccol3[ford],pch=20, cex=2,
        main=titlepca[i])
}
Figure 4: PCA of untransformed correlation matrix

Figure 4: PCA of untransformed correlation matrix

Figure 4: PCA of untransformed correlation matrix

Figure 4: PCA of untransformed correlation matrix

Figure 4: PCA of untransformed correlation matrix

Figure 4: PCA of untransformed correlation matrix

pcaB <- prcomp(corrfB,center=TRUE, scale=TRUE)
pairs(pcaB$x[,1:3], col=cr[bford],pch=20, cex=2)
Figure 5: PCA of untransformed correlation matrix

plot(pcaB$x[,1:3], col=cr[bford],pch=20, cex=2)
text(pcaB$x[,1:2], labels=rownames(pcaB$x), pos=4, col=cr[bford])
Figure 6: PC2 v PC1 of untransformed correlation matrix

pcaB <- pcaB$x
rgl::plot3d(pcaB[,1],pcaB[,2],pcaB[,3],type='s',col=cr[bford], size=1)
rgl::rgl.texts(pcaB[,1],pcaB[,2],pcaB[,3],abbreviate(rownames(pcaB)), col=cr[bford], adj=c(0,2))
subid <- currentSubscene3d()
rglwidget(elementId="rgl-pcaB",width=720,height=720)

3 LDA

el2 <- lapply(pcaL, function(x) getElbows(x$sdev, plot=FALSE))
del2 <- lapply(pcaL, function(x) {
                 elb <- getElbows(x$sdev, plot=FALSE)[1]
                 return(x$x[,1:elb])
        })

d1 <- lapply(del2, function(x) data.frame(type = exType, x))
lda.fit <- lapply(d1, function(x) lda(type ~ ., data = x) )
rf.fit <- lapply(d1, function(x) randomForest(type ~ ., data=x))
rf.pred <- lapply(rf.fit, '[[', 3)

LDA was run using only the principal components up to and including the first elbow from ZG from the untransformed correlation matrix.

titlesvor <- paste("LDA decision boundaries for", paste0("F", 0:5))
voronoidf <- lapply(lapply(lda.fit, '[[', 3), data.frame)
#voronoidf <- data.frame(x=lda.fit$means[,1],y=lda.fit$means[,2])

#This creates the voronoi line segments

par(mfrow = c(3,2))
for(i in 1:length(d1)){
  plot(d1[[i]][,2:3], col=ccol3[ford], pch=20, cex=1.5)
  title(titlesvor[i])
  text(d1[[i]][,2:3], labels=rownames(d1[[i]]), 
       pos=ifelse(d1[[i]][,2]<max(d1[[i]][,2] -0.5),4,2), 
       col=ccol3[ford], cex=1.2)

  deldir(x = voronoidf[[i]][,1],y = voronoidf[[i]][,2], rw = c(-15,15,-15,15), 
       plotit=TRUE, add=TRUE, wl='te')
  text(voronoidf[[i]], labels=rownames(voronoidf[[i]]), cex=1.5, pos=1)
}
Figure 8: Voronoi diagrams on class means from LDA on PCA of untransformed correlation matrices

truth <- lapply(d1, '[[', 1)
pred <- lapply(lapply(lda.fit, predict), '[[', 1)

for( i in 1:length(truth)) {
 print(table(truth = truth[[i]],pred = pred[[i]]) )
}
#        pred
# truth   ex in other
#   ex    10  0     1
#   in     1  5     0
#   other  1  0     6
#        pred
# truth   ex in other
#   ex    10  0     1
#   in     1  5     0
#   other  1  0     6
#        pred
# truth   ex in other
#   ex    10  0     1
#   in     1  5     0
#   other  0  2     5
#        pred
# truth   ex in other
#   ex     9  0     2
#   in     1  5     0
#   other  0  2     5
#        pred
# truth   ex in other
#   ex    10  0     1
#   in     1  5     0
#   other  1  0     6
#        pred
# truth   ex in other
#   ex    11  0     0
#   in     0  6     0
#   other  0  0     7
lda.err <- list()
for(i in 1:6) {
 tr <- as.numeric(truth[[i]])
 pr <- as.numeric(pred[[i]])
 lda.err[[i]] <- sum( tr != pr ) / length(tr)
}

Reduce('c', lda.err)
# [1] 0.1250000 0.1250000 0.1666667 0.2083333 0.1250000 0.0000000

The above gives the LDA error rates for each of the features.